# use ps for stool samples only
ps_full <- readRDS("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/rds_files/ps_IMProveCF_patientsAndControls_Run1-23_18102023.rds")
# subset per material
ps_full_stool <- subset_samples(ps_full, material== "Stool")
# remove zero abundances from each dataset
ps_stool <- tax_filter(ps_full_stool, min_prevalence = 0.3) # has to be prevalent in 30% of samples
## Proportional min_prevalence given: 0.3 --> min 76/251 samples.
# vs
# calculate relative abundances
ps_stool_relab <- transform_sample_counts(ps_stool, function(x) x/sum(x))
#ps_stool_relab <-prune_taxa(taxa_sums(ps_stool_relab) > 0.45, ps_stool_relab) # > 40% prevalence over all samples, 107 most abundant ASVs

ps_stool_relab_family <- tax_glom(ps_stool_relab, "Family")
ps_stool_relab_order <- tax_glom(ps_stool_relab, "Order")
ps_stool_relab_class <- tax_glom(ps_stool_relab, "Class")
ps_stool_relab_phylum<- tax_glom(ps_stool_relab, "Phylum")

ps_stool_relab <- tax_glom(ps_stool_relab, "Genus")

ASV/Genus level

psControlV2 <-
 phyloseq::subset_samples(ps_stool_relab , visit_cal_9  %in% c("Control", "2"))

metadata_ControlV2 <- as(sample_data(psControlV2),"data.frame")

metada <- metadata_ControlV2%>%
  mutate(visit_ControlV2 = (as.numeric(visit_cal_9))-1) # Control is assigned 1, CF timepoint 0

metada<- cbind(visit_ControlV2 =metada$visit_ControlV2, subset(metada,select = -c(visit_ControlV2))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_ControlV2)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControlV2))
taxtable <- as.data.frame(tax_table(psControlV2))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  dplyr::mutate(ASV_Genus = paste(row.names(.), Genus, sep = "_")) %>%
  dplyr::select(ASV_Genus = ASV_Genus, -Genus)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9) # if I compare Control vs CF, there are no repeated measures in this model , randomVar = list("+ (1|id)", c("id"))

raw_p_ControlV2 <- metad[1]
corr_p_ControlV2 <- metad[2]
effect_size_ControlV2 <- metad[3]
status_ControlV2 <- metad[4]
psControl_V3V5 <-
 phyloseq::subset_samples(ps_stool_relab , visit_cal_cor  %in% c(NA, "3", "4", "5"))

metadata_Control_V3V5 <- as(sample_data(psControl_V3V5),"data.frame")

metadata_Control_V3V5$visit_sum
##   [1] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [10] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [19] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [28] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [37] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [46] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [55] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [64] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [73] Control Control Control Control Control Control 3-5     3-5     3-5    
##  [82] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [91] Control Control Control Control Control Control Control 3-5     3-5    
## [100] 3-5     3-5     Control Control Control Control Control Control Control
## [109] Control Control Control Control Control Control Control Control Control
## [118] Control Control Control Control
## Levels: 3-5 Control
metada <- metadata_Control_V3V5%>%
  mutate(visit_Control_V3V5 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V3V5 =metada$visit_Control_V3V5, subset(metada, select = -c(visit_Control_V3V5))) # brings visit into the first column

metada <- metada%>%
 dplyr::select(visit_Control_V3V5) # COntrols are assigned 1

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V3V5))
taxtable <- as.data.frame(tax_table(psControl_V3V5))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  mutate(ASV_Genus = paste(row.names(.), Genus, sep = "_")) %>%
  dplyr::select(ASV_Genus = ASV_Genus, -Genus)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id"))

raw_pControl_V3V5 <- metad[1]
corr_pControl_V3V5 <- metad[2]
effect_sizeControl_V3V5 <- metad[3]
statusControl_V3V5 <- metad[4]
psControl_V6V7 <-
 phyloseq::subset_samples(ps_stool_relab , visit_cal_cor  %in% c(NA, "6", "7"))

metadata_Control_V6V7 <- as(sample_data(psControl_V6V7),"data.frame")

metadata_Control_V6V7$visit_cal_cor
##  [1] 6    6    7    6    7    6    6    6    6    6    6    6    6    6    6   
## [16] 6    7    6    7    7    7    6    7    7    7    6    7    7    7    6   
## [31] 7    7    7    7    7    6    7    7    <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> 7    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> 6    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [76] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 6 7
metadata_Control_V6V7$visit_sum
##  [1] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [10] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [19] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [28] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [37] 6-7     6-7     Control Control Control Control Control Control Control
## [46] Control Control Control Control Control 6-7     Control Control Control
## [55] Control Control Control Control Control Control Control Control Control
## [64] Control 6-7     Control Control Control Control Control Control Control
## [73] Control Control Control Control Control Control Control Control Control
## [82] Control Control Control Control
## Levels: 6-7 Control
metada <- metadata_Control_V6V7%>%
  mutate(visit_Control_V6V7 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V6V7 =metada$visit_Control_V6V7, subset(metada, select = -c(visit_Control_V6V7))) # brings visit into the first column

metada <- metada%>%
dplyr::select(visit_Control_V6V7)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V6V7))
taxtable <- as.data.frame(tax_table(psControl_V6V7))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  mutate(ASV_Genus = paste(row.names(.), Genus, sep = "_")) %>%
  dplyr::select(ASV_Genus = ASV_Genus, -Genus)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada, nnodes=9)#, randomVar = list("+ (1|id)", c("id")), 

raw_p_Control_V6V7 <- metad[1]
corr_p_Control_V6V7 <- metad[2]
effect_size_Control_V6V7 <- metad[3]
status_Control_V6V7 <- metad[4]
psControl_V8V9 <-
 phyloseq::subset_samples(ps_stool_relab , visit_cal_9  %in% c("Control", "8", "9"))

metadata_Control_V8V9 <- as(sample_data(psControl_V8V9),"data.frame")

metada <- metadata_Control_V8V9%>%
  mutate(visitControl_V8V9 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V8V9 =metada$visitControl_V8V9, subset(metada, select = -c(visitControl_V8V9))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V8V9)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V8V9))
taxtable <- as.data.frame(tax_table(psControl_V8V9))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  mutate(ASV_Genus = paste(row.names(.), Genus, sep = "_")) %>%
  dplyr::select(ASV_Genus = ASV_Genus, -Genus)
taxtable$ASV <- row.names(taxtable)

metada$visitControl_V8V9
##  [1] 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada, nnodes=9) #, randomVar = list("+ (1|id)", c("id")),  

raw_p_Control_V8V9 <- metad[1]
corr_p_Control_V8V9 <- metad[2]
effect_size_Control_V8V9 <- metad[3]
status_Control_V8V9 <- metad[4]
psControl_V1 <-
 phyloseq::subset_samples(ps_stool_relab , visit_cal_9  %in% c("Control", "1"))

metadata_Control_V1 <- as(sample_data(psControl_V1),"data.frame")

metada <- metadata_Control_V1%>%
  mutate(visitControl_V1 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V1 =metada$visitControl_V1, subset(metada,select = -c(visitControl_V1))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V1)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V1))
taxtable <- as.data.frame(tax_table(psControl_V1))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  

taxtable <- taxtable %>%
  mutate(ASV_Genus = paste(row.names(.), Genus, sep = "_")) %>%
  dplyr::select(ASV_Genus = ASV_Genus, -Genus)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)

raw_p_Control_V1 <- metad[1]
corr_p_Control_V1 <- metad[2]
effect_size_Control_V1 <- metad[3]
status_Control_V1 <- metad[4]
raw_p <- purrr::map2(raw_p_ControlV2,raw_pControl_V3V5,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V6V7,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V8V9,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V1,cbind)

raw_p_df <- bind_rows(raw_p)
raw_p_df <- data.frame(raw_p_df$Ps)
raw_p_df  <- raw_p_df %>%
  rownames_to_column()%>%
  mutate(p_Control_V2=visit_ControlV2)%>%
  mutate(p_Control_V3V5=visit_Control_V3V5)%>%
  mutate(p_Control_V6V7=visit_Control_V6V7)%>%
  mutate(p_Control_V8V9=visitControl_V8V9)%>%
  mutate(p_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  ##dplyr::select(-c(1:10))

corr_p <- purrr::map2(corr_p_ControlV2,corr_pControl_V3V5,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V6V7,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V8V9,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V1,cbind)

corr_p_df <- bind_rows(corr_p)
corr_p_df <- data.frame(corr_p_df$Qs)
corr_p_df  <- corr_p_df %>%
  rownames_to_column()%>%
  mutate(q_Control_V2=visit_ControlV2)%>%
  mutate(q_Control_V3V5=visit_Control_V3V5)%>%
  mutate(q_Control_V6V7=visit_Control_V6V7)%>%
  mutate(q_Control_V8V9=visitControl_V8V9)%>%
  mutate(q_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  ##dplyr::select(-c(1:10))

effect_size <- purrr::map2(effect_size_ControlV2,effect_sizeControl_V3V5,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V6V7,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V8V9,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V1,cbind)

effect_size_df <- bind_rows(effect_size)
effect_size_df <- data.frame(effect_size_df$Ds)
effect_size_df  <- effect_size_df %>%
  rownames_to_column()%>%
  mutate(d_Control_V2=visit_ControlV2)%>%
  mutate(d_Control_V3V5=visit_Control_V3V5)%>%
  mutate(d_Control_V6V7=visit_Control_V6V7)%>%
  mutate(d_Control_V8V9=visitControl_V8V9)%>%
  mutate(d_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
 # #dplyr::select(-c(1:10))


status <- purrr::map2(status_ControlV2,statusControl_V3V5,cbind)
status <- purrr::map2(status,status_Control_V6V7,cbind)
status <- purrr::map2(status,status_Control_V8V9,cbind)
status <- purrr::map2(status,status_Control_V1,cbind)

status_df <- bind_rows(status)
status_df <- data.frame(status_df$status)
status_df  <- status_df %>%
  rownames_to_column()%>%
  mutate(status_Control_V2=visit_ControlV2)%>%
  mutate(status_Control_V3V5=visit_Control_V3V5)%>%
  mutate(status_Control_V6V7=visit_Control_V6V7)%>%
  mutate(status_Control_V8V9=visitControl_V8V9)%>%
  mutate(status_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
 # #dplyr::select(-c(1:10))

effect_table <- raw_p_df%>%
  full_join(corr_p_df, by="ASV")%>%
  full_join(effect_size_df, by="ASV")%>%
  full_join(status_df, by="ASV")%>%
  full_join(taxtable, by="ASV")

effect_table_sig <- effect_table%>%
  filter(status_Control_V1=="OK_nc"|status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc")

#pivot long format

effect_table_sig_long <- effect_table_sig%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Control", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Control,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Control, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Control", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Control,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Control, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Control", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Control,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Control, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Control", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Control,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Control, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa=paste(ASV_Genus, "(Genus)"))%>%
  dplyr::select(-ASV_Genus)
effect_table_sig_long$comparison_p <- factor(effect_table_sig_long$comparison_p, levels = c("Control_V1","Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long%>%
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = as.factor (sign (effectSize)), size = abs (effectSize), color=(fdr))) +
  scale_shape_manual (values = c (25, 24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("1","3", "6-12", "15-18", "21-24"))+
  labs(x= "months from treatment start")

# dplyr::select the entries which have OK_nc in status with all differences to Control

effect_table_sig_control <- effect_table%>%
  filter(status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc"| status_Control_V1=="OK_nc")

#pivot long format

effect_table_sig_long_control <- effect_table_sig_control%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa=paste(ASV_Genus, "(Genus)"))%>%
  dplyr::select(-ASV_Genus)

effect_table_sig_long_control$comparison_p <- factor(effect_table_sig_long_control$comparison_p, levels = c("Control_V1","Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long_control%>%
  mutate(shape_sign= as.factor(sign(effectSize))) %>% 
  filter(shape_sign!=0) %>% #fiter out effect sizes that have no direction
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = shape_sign, size = abs (effectSize), color=(fdr)))+
  scale_shape_manual (values = c (25,24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("1", "3", "6-12", "15-18", "21-24"))+
  labs(x= "months from treatment start")

Family level

psControlV2 <-
 phyloseq::subset_samples(ps_stool_relab_family , visit_cal_9  %in% c("Control", "V2"))

metadata_ControlV2 <- as(sample_data(psControlV2),"data.frame")

metada <- metadata_ControlV2%>%
  mutate(visit_ControlV2 = (as.numeric(visit.x))-1)

metada<- cbind(visit_ControlV2 =metada$visit_ControlV2, subset(metada, select = -c(visit_ControlV2))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_ControlV2)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControlV2))
taxtable <- as.data.frame(tax_table(psControlV2))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  mutate(ASV_Family = paste(row.names(.), Family, sep = "_")) %>%
  dplyr::select(ASV_Family = ASV_Family, -Family)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,   nnodes=9)#randomVar = list("+ (1|id)", c("id")),

raw_p_ControlV2 <- metad[1]
corr_p_ControlV2 <- metad[2]
effect_size_ControlV2 <- metad[3]
status_ControlV2 <- metad[4]
psControl_V3V5 <-
 phyloseq::subset_samples(ps_stool_relab_family , visit_cal_9  %in% c("Control", "3", "4", "5"))

metadata_Control_V3V5 <- as(sample_data(psControl_V3V5),"data.frame")

metadata_Control_V3V5$visit_cal_cor
##   [1] 3    4    5    3    4    3    3    4    5    3    4    3    4    5    3   
##  [16] 4    3    4    5    3    3    5    4    3    4    3    4    5    3    4   
##  [31] 3    3    4    3    4    5    3    4    5    3    4    3    4    5    4   
##  [46] 5    3    4    5    3    4    5    5    3    5    4    3    5    4    5   
##  [61] 5    5    4    5    3    3    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [76] <NA> <NA> <NA> 4    3    3    4    4    3    <NA> <NA> <NA> <NA> <NA> <NA>
##  [91] <NA> <NA> <NA> <NA> <NA> <NA> <NA> 5    5    5    4    <NA> <NA> <NA> <NA>
## [106] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [121] <NA>
## Levels: 3 4 5
metadata_Control_V3V5$visit_sum
##   [1] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [10] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [19] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [28] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [37] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [46] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [55] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [64] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [73] Control Control Control Control Control Control 3-5     3-5     3-5    
##  [82] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [91] Control Control Control Control Control Control Control 3-5     3-5    
## [100] 3-5     3-5     Control Control Control Control Control Control Control
## [109] Control Control Control Control Control Control Control Control Control
## [118] Control Control Control Control
## Levels: 3-5 Control
metada <- metadata_Control_V3V5%>%
  mutate(visit_Control_V3V5 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V3V5 =metada$visit_Control_V3V5, subset(metada,select = -c(visit_Control_V3V5))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_Control_V3V5)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V3V5))
taxtable <- as.data.frame(tax_table(psControl_V3V5))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  mutate(ASV_Family = paste(row.names(.), Family, sep = "_")) %>%
  dplyr::select(ASV_Family = ASV_Family, -Family)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id"))

raw_pControl_V3V5 <- metad[1]
corr_pControl_V3V5 <- metad[2]
effect_sizeControl_V3V5 <- metad[3]
statusControl_V3V5 <- metad[4]
psControl_V6V7 <-
 phyloseq::subset_samples(ps_stool_relab_family , visit_cal_9  %in% c("Control", "6", "7"))

metadata_Control_V6V7 <- as(sample_data(psControl_V6V7),"data.frame")

metadata_Control_V6V7$visit_cal_cor
##  [1] 6    6    7    6    7    6    6    6    6    6    6    6    6    6    6   
## [16] 6    7    6    7    7    7    6    7    7    7    6    7    7    7    6   
## [31] 7    7    7    7    7    6    7    7    <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> 7    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> 6    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [76] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 6 7
metadata_Control_V6V7$visit_sum
##  [1] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [10] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [19] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [28] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [37] 6-7     6-7     Control Control Control Control Control Control Control
## [46] Control Control Control Control Control 6-7     Control Control Control
## [55] Control Control Control Control Control Control Control Control Control
## [64] Control 6-7     Control Control Control Control Control Control Control
## [73] Control Control Control Control Control Control Control Control Control
## [82] Control Control Control Control
## Levels: 6-7 Control
metada <- metadata_Control_V6V7%>%
  mutate(visit_Control_V6V7 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V6V7 =metada$visit_Control_V6V7, subset(metada,select = -c(visit_Control_V6V7))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_Control_V6V7)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V6V7))
taxtable <- as.data.frame(tax_table(psControl_V6V7))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  mutate(ASV_Family = paste(row.names(.), Family, sep = "_")) %>%
  dplyr::select(ASV_Family = ASV_Family, -Family)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#, randomVar = list("+ (1|id)", c("id")),

raw_p_Control_V6V7 <- metad[1]
corr_p_Control_V6V7 <- metad[2]
effect_size_Control_V6V7 <- metad[3]
status_Control_V6V7 <- metad[4]
psControl_V8V9 <-
 phyloseq::subset_samples(ps_stool_relab_family, visit_cal_cor  %in% c(NA, "8", "9"))

metadata_Control_V8V9 <- as(sample_data(psControl_V8V9),"data.frame")

metadata_Control_V8V9$visit_cal_cor
##  [1] 8    8    8    8    8    8    8    8    8    8    8    <NA> <NA> <NA> <NA>
## [16] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 9    9    8    9    9    8    9   
## [31] 9    8    9    9    9    9    8    8    9    9    <NA> <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 9    <NA> <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 8 9
metadata_Control_V8V9$visit_sum
##  [1] 8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10   
## [10] 8-10    8-10    Control Control Control Control Control Control Control
## [19] Control Control Control Control Control 8-10    8-10    8-10    8-10   
## [28] 8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10   
## [37] 8-10    8-10    8-10    8-10    Control Control Control Control Control
## [46] Control Control Control Control Control Control Control Control 8-10   
## [55] Control Control Control Control Control Control Control Control Control
## [64] Control Control Control Control Control Control Control Control Control
## [73] Control Control
## Levels: 8-10 Control
metada <- metadata_Control_V8V9%>%
  mutate(visitControl_V8V9 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V8V9 =metada$visitControl_V8V9, subset(metada,select = -c(visitControl_V8V9))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V8V9, id)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V8V9))
taxtable <- as.data.frame(tax_table(psControl_V8V9))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  mutate(ASV_Family = paste(row.names(.), Family, sep = "_")) %>%
  dplyr::select(ASV_Family = ASV_Family, -Family)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,   nnodes=9)

#randomVar = list("+ (1|id)", c("id")),
raw_p_Control_V8V9 <- metad[1]
corr_p_Control_V8V9 <- metad[2]
effect_size_Control_V8V9 <- metad[3]
status_Control_V8V9 <- metad[4]
psControl_V1 <-
 phyloseq::subset_samples(ps_stool_relab_family , visit_cal_9  %in% c("Control", "1"))

metadata_Control_Control <- as(sample_data(psControl_V1),"data.frame")

metada <- metadata_Control_Control%>%
  mutate(visitControl_V1 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V1 =metada$visitControl_V1, subset(metada,select = -c(visitControl_V1))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V1)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V1))
taxtable <- as.data.frame(tax_table(psControl_V1))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable %>%
  mutate(ASV_Family = paste(row.names(.), Family, sep = "_")) %>%
  dplyr::select(ASV_Family = ASV_Family, -Family)
taxtable$ASV <- row.names(taxtable)

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)

raw_p_Control_V1 <- metad[1]
corr_p_Control_V1 <- metad[2]
effect_size_Control_V1 <- metad[3]
status_Control_V1 <- metad[4]
raw_p <- purrr::map2(raw_p_ControlV2,raw_pControl_V3V5,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V6V7,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V8V9,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V1,cbind)

raw_p_df <- bind_rows(raw_p)
raw_p_df <- data.frame(raw_p_df$Ps)
raw_p_df  <- raw_p_df %>%
  rownames_to_column()%>%
  mutate(p_Control_V2=visit_ControlV2)%>%
  mutate(p_Control_V3V5=visit_Control_V3V5)%>%
  mutate(p_Control_V6V7=visit_Control_V6V7)%>%
  mutate(p_Control_V8V9=visitControl_V8V9)%>%
  mutate(p_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

corr_p <- purrr::map2(corr_p_ControlV2,corr_pControl_V3V5,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V6V7,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V8V9,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V1,cbind)

corr_p_df <- bind_rows(corr_p)
corr_p_df <- data.frame(corr_p_df$Qs)
corr_p_df  <- corr_p_df %>%
  rownames_to_column()%>%
  mutate(q_Control_V2=visit_ControlV2)%>%
  mutate(q_Control_V3V5=visit_Control_V3V5)%>%
  mutate(q_Control_V6V7=visit_Control_V6V7)%>%
  mutate(q_Control_V8V9=visitControl_V8V9)%>%
  mutate(q_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

effect_size <- purrr::map2(effect_size_ControlV2,effect_sizeControl_V3V5,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V6V7,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V8V9,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V1,cbind)

effect_size_df <- bind_rows(effect_size)
effect_size_df <- data.frame(effect_size_df$Ds)
effect_size_df  <- effect_size_df %>%
  rownames_to_column()%>%
  mutate(d_Control_V2=visit_ControlV2)%>%
  mutate(d_Control_V3V5=visit_Control_V3V5)%>%
  mutate(d_Control_V6V7=visit_Control_V6V7)%>%
  mutate(d_Control_V8V9=visitControl_V8V9)%>%
  mutate(d_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

status <- purrr::map2(status_ControlV2,statusControl_V3V5,cbind)
status <- purrr::map2(status,status_Control_V6V7,cbind)
status <- purrr::map2(status,status_Control_V8V9,cbind)
status <- purrr::map2(status,status_Control_V1,cbind)

status_df <- bind_rows(status)
status_df <- data.frame(status_df$status)
status_df  <- status_df %>%
  rownames_to_column()%>%
  mutate(status_Control_V2=visit_ControlV2)%>%
  mutate(status_Control_V3V5=visit_Control_V3V5)%>%
  mutate(status_Control_V6V7=visit_Control_V6V7)%>%
  mutate(status_Control_V8V9=visitControl_V8V9)%>%
  mutate(status_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

effect_table_family <- raw_p_df%>%
  full_join(corr_p_df, by="ASV")%>%
  full_join(effect_size_df, by="ASV")%>%
  full_join(status_df, by="ASV")%>%
  full_join(taxtable, by="ASV")

# dplyr::select the entries which have OK_nc in status
effect_table_sig_family <- effect_table_family%>%
  filter(status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc")

#pivot long format

effect_table_sig_long_family <- effect_table_sig_family%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa=paste(ASV_Family, "(Family)"))%>%
  dplyr::select(-ASV_Family)
effect_table_sig_long_family$comparison_p <- factor(effect_table_sig_long_family$comparison_p, levels = c("Control_V1"," Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long_family%>%
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = as.factor (sign (effectSize)), size = abs (effectSize), color=(fdr))) +
  scale_shape_manual (values = c (25, 24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("1", "3", "6-12", "15-18", "21-24"))+
  labs(x= "months from treatment start")
# dplyr::select the entries which have OK_nc in status with all differences to Control

effect_table_sig_control <- effect_table_family%>%
  filter(status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc"| status_Control_V1=="OK_nc")

#pivot long format

effect_table_sig_long_control_family <- effect_table_sig_control%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa=paste(ASV_Family, "(Family)"))%>%
  dplyr::select(-ASV_Family)

effect_table_sig_long_control_family$comparison_p <- factor(effect_table_sig_long_control_family$comparison_p, levels = c("Control_V1","Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long_control_family%>%
  mutate(shape_sign= as.factor(sign(effectSize))) %>% 
  filter(shape_sign!=0) %>% #fiter out effect sizes that have no direction
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = shape_sign, size = abs (effectSize), color=(fdr)))+
  scale_shape_manual (values = c (25,24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("3", "6-12", "15-18", "21-24", "Control"))+
  labs(x= "months from treatment start")

Order level

psControlV2 <-
 phyloseq::subset_samples(ps_stool_relab_order , visit_cal_cor  %in% c(NA, "2"))

metadata_ControlV2 <- as(sample_data(psControlV2),"data.frame")

metada <- metadata_ControlV2%>%
  mutate(visit_ControlV2 = (as.numeric(visit.x))-1) %>% 
  mutate(visit_ControlV2 = case_when(is.na(visit_ControlV2)~1, T~0))
metada$visit_ControlV2
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
metada<- cbind(visit_ControlV2 =metada$visit_ControlV2, subset(metada, select = -c(visit_ControlV2))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_ControlV2)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControlV2))
taxtable <- as.data.frame(tax_table(psControlV2))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Order)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#, randomVar = list("+ (1|id)", c("id")),

raw_p_ControlV2 <- metad[1]
corr_p_ControlV2 <- metad[2]
effect_size_ControlV2 <- metad[3]
status_ControlV2 <- metad[4]
psControl_V3V5 <-
 phyloseq::subset_samples(ps_stool_relab_order , visit_cal_cor  %in% c(NA, "3", "4", "5"))

metadata_Control_V3V5 <- as(sample_data(psControl_V3V5),"data.frame")

metadata_Control_V3V5$visit_cal_cor
##   [1] 3    4    5    3    4    3    3    4    5    3    4    3    4    5    3   
##  [16] 4    3    4    5    3    3    5    4    3    4    3    4    5    3    4   
##  [31] 3    3    4    3    4    5    3    4    5    3    4    3    4    5    4   
##  [46] 5    3    4    5    3    4    5    5    3    5    4    3    5    4    5   
##  [61] 5    5    4    5    3    3    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [76] <NA> <NA> <NA> 4    3    3    4    4    3    <NA> <NA> <NA> <NA> <NA> <NA>
##  [91] <NA> <NA> <NA> <NA> <NA> <NA> <NA> 5    5    5    4    <NA> <NA> <NA> <NA>
## [106] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [121] <NA>
## Levels: 3 4 5
metadata_Control_V3V5$visit_sum
##   [1] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [10] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [19] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [28] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [37] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [46] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [55] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [64] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [73] Control Control Control Control Control Control 3-5     3-5     3-5    
##  [82] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [91] Control Control Control Control Control Control Control 3-5     3-5    
## [100] 3-5     3-5     Control Control Control Control Control Control Control
## [109] Control Control Control Control Control Control Control Control Control
## [118] Control Control Control Control
## Levels: 3-5 Control
metada <- metadata_Control_V3V5%>%
  mutate(visit_Control_V3V5 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V3V5 =metada$visit_Control_V3V5, subset(metada, select = -c(visit_Control_V3V5))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_Control_V3V5)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V3V5))
taxtable <- as.data.frame(tax_table(psControl_V3V5))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Order)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada, nnodes=9) #randomVar = list("+ (1|id)", c("id")), 

raw_pControl_V3V5 <- metad[1]
corr_pControl_V3V5 <- metad[2]
effect_sizeControl_V3V5 <- metad[3]
statusControl_V3V5 <- metad[4]
psControl_V6V7 <-
 phyloseq::subset_samples(ps_stool_relab_order , visit_cal_cor  %in% c(NA, "6", "7"))

metadata_Control_V6V7 <- as(sample_data(psControl_V6V7),"data.frame")

metadata_Control_V6V7$visit_cal_cor
##  [1] 6    6    7    6    7    6    6    6    6    6    6    6    6    6    6   
## [16] 6    7    6    7    7    7    6    7    7    7    6    7    7    7    6   
## [31] 7    7    7    7    7    6    7    7    <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> 7    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> 6    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [76] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 6 7
metadata_Control_V6V7$visit_sum
##  [1] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [10] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [19] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [28] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [37] 6-7     6-7     Control Control Control Control Control Control Control
## [46] Control Control Control Control Control 6-7     Control Control Control
## [55] Control Control Control Control Control Control Control Control Control
## [64] Control 6-7     Control Control Control Control Control Control Control
## [73] Control Control Control Control Control Control Control Control Control
## [82] Control Control Control Control
## Levels: 6-7 Control
metada <- metadata_Control_V6V7%>%
  mutate(visit_Control_V6V7 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V6V7 =metada$visit_Control_V6V7, subset(metada, select = -c(visit_Control_V6V7))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_Control_V6V7)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V6V7))
taxtable <- as.data.frame(tax_table(psControl_V6V7))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Order)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9) #randomVar = list("+ (1|id)", c("id")), 

raw_p_Control_V6V7 <- metad[1]
corr_p_Control_V6V7 <- metad[2]
effect_size_Control_V6V7 <- metad[3]
status_Control_V6V7 <- metad[4]
psControl_V8V9 <-
 phyloseq::subset_samples(ps_stool_relab_order, visit_cal_9  %in% c("Control", "8", "9"))

metadata_Control_V8V9 <- as(sample_data(psControl_V8V9),"data.frame")

metadata_Control_V8V9$visit_cal_cor
##  [1] 8    8    8    8    8    8    8    8    8    8    8    <NA> <NA> <NA> <NA>
## [16] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 9    10   9    8    9    9    8   
## [31] 9    9    8    9    9    9    9    8    8    9    9    <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 9    <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 8 9 10
metadata_Control_V8V9$visit_sum
##  [1] 8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10   
## [10] 8-10    8-10    Control Control Control Control Control Control Control
## [19] Control Control Control Control Control 8-10    8-10    8-10    8-10   
## [28] 8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10   
## [37] 8-10    8-10    8-10    8-10    8-10    Control Control Control Control
## [46] Control Control Control Control Control Control Control Control Control
## [55] 8-10    Control Control Control Control Control Control Control Control
## [64] Control Control Control Control Control Control Control Control Control
## [73] Control Control Control
## Levels: 8-10 Control
metada <- metadata_Control_V8V9%>%
  mutate(visitControl_V8V9 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V8V9 =metada$visitControl_V8V9, subset(metada, select = -c(visitControl_V8V9))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V8V9)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V8V9))
taxtable <- as.data.frame(tax_table(psControl_V8V9))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Order)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id")), 

raw_p_Control_V8V9 <- metad[1]
corr_p_Control_V8V9 <- metad[2]
effect_size_Control_V8V9 <- metad[3]
status_Control_V8V9 <- metad[4]
psControl_V1 <-
 phyloseq::subset_samples(ps_stool_relab_order , visit_cal_9  %in% c("Control", "1"))

metadata_Control_Control <- as(sample_data(psControl_V1),"data.frame")

metada <- metadata_Control_Control%>%
  mutate(visitControl_V1 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V1 =metada$visitControl_V1, subset(metada, select = -c(visitControl_V1))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V1)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V1))
taxtable <- as.data.frame(tax_table(psControl_V1))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Order)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)

raw_p_Control_V1 <- metad[1]
corr_p_Control_V1 <- metad[2]
effect_size_Control_V1 <- metad[3]
status_Control_V1 <- metad[4]
raw_p <- purrr::map2(raw_p_ControlV2,raw_pControl_V3V5,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V6V7,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V8V9,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V1,cbind)

raw_p_df <- bind_rows(raw_p)
raw_p_df <- data.frame(raw_p_df$Ps)
raw_p_df  <- raw_p_df %>%
  rownames_to_column()%>%
  mutate(p_Control_V2=visit_ControlV2)%>%
  mutate(p_Control_V3V5=visit_Control_V3V5)%>%
  mutate(p_Control_V6V7=visit_Control_V6V7)%>%
  mutate(p_Control_V8V9=visitControl_V8V9)%>%
  mutate(p_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

corr_p <- purrr::map2(corr_p_ControlV2,corr_pControl_V3V5,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V6V7,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V8V9,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V1,cbind)

corr_p_df <- bind_rows(corr_p)
corr_p_df <- data.frame(corr_p_df$Qs)
corr_p_df  <- corr_p_df %>%
  rownames_to_column()%>%
  mutate(q_Control_V2=visit_ControlV2)%>%
  mutate(q_Control_V3V5=visit_Control_V3V5)%>%
  mutate(q_Control_V6V7=visit_Control_V6V7)%>%
  mutate(q_Control_V8V9=visitControl_V8V9)%>%
  mutate(q_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

effect_size <- purrr::map2(effect_size_ControlV2,effect_sizeControl_V3V5,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V6V7,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V8V9,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V1,cbind)

effect_size_df <- bind_rows(effect_size)
effect_size_df <- data.frame(effect_size_df$Ds)
effect_size_df  <- effect_size_df %>%
  rownames_to_column()%>%
  mutate(d_Control_V2=visit_ControlV2)%>%
  mutate(d_Control_V3V5=visit_Control_V3V5)%>%
  mutate(d_Control_V6V7=visit_Control_V6V7)%>%
  mutate(d_Control_V8V9=visitControl_V8V9)%>%
  mutate(d_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

status <- purrr::map2(status_ControlV2,statusControl_V3V5,cbind)
status <- purrr::map2(status,status_Control_V6V7,cbind)
status <- purrr::map2(status,status_Control_V8V9,cbind)
status <- purrr::map2(status,status_Control_V1,cbind)

status_df <- bind_rows(status)
status_df <- data.frame(status_df$status)
status_df  <- status_df %>%
  rownames_to_column()%>%
  mutate(status_Control_V2=visit_ControlV2)%>%
  mutate(status_Control_V3V5=visit_Control_V3V5)%>%
  mutate(status_Control_V6V7=visit_Control_V6V7)%>%
  mutate(status_Control_V8V9=visitControl_V8V9)%>%
  mutate(status_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

effect_table_order <- raw_p_df%>%
  full_join(corr_p_df, by="ASV")%>%
  full_join(effect_size_df, by="ASV")%>%
  full_join(status_df, by="ASV")%>%
  full_join(taxtable, by="ASV")

# dplyr::select the entries which have OK_nc in status
effect_table_sig_order <- effect_table_order%>%
  filter(status_Control_V1=="OK_nc"| status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc")

#pivot long format

effect_table_sig_long_order <- effect_table_sig_order%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa= paste(Order, "(Order)"))%>%
  dplyr::select(-Order)
effect_table_sig_long_order$comparison_p <- factor(effect_table_sig_long_order$comparison_p, levels = c("Control_V1","Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long_order%>%
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = as.factor (sign (effectSize)), size = abs (effectSize), color=(fdr))) +
  scale_shape_manual (values = c (25, 24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("1", "3", "6-12", "15-18", "21-24"))+
  labs(x= "months from treatment start")

# dplyr::select the entries which have OK_nc in status with all differences to Control

effect_table_sig_control <- effect_table_order%>%
  filter(status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc"| status_Control_V1=="OK_nc")

#pivot long format

effect_table_sig_long_control_order<- effect_table_sig_control%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa= paste(Order, "(Order)"))%>%
  dplyr::select(-Order)

effect_table_sig_long_control_order$comparison_p <- factor(effect_table_sig_long_control_order$comparison_p, levels = c("Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9","Control_Control"))

effect_table_sig_long_control_order%>%
  mutate(shape_sign= as.factor(sign(effectSize))) %>% 
  filter(shape_sign!=0) %>% #fiter out effect sizes that have no direction
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = shape_sign, size = abs (effectSize), color=(fdr)))+
  scale_shape_manual (values = c (25,24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("3", "6-12", "15-18", "21-24", "Control"))+
  labs(x= "months from treatment start")

class level

psControlV2 <-
 phyloseq::subset_samples(ps_stool_relab_class , visit_cal_cor  %in% c(NA, "2"))

metadata_ControlV2 <- as(sample_data(psControlV2),"data.frame")

metada <- metadata_ControlV2%>%
  mutate(visit_ControlV2 = (as.numeric(visit.x))-1) %>% 
  mutate(visit_ControlV2 = case_when(is.na(visit_ControlV2)~1, T~0))

metada<- cbind(visit_ControlV2 =metada$visit_ControlV2, subset(metada, select = -c(visit_ControlV2))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_ControlV2)

metada$visit_ControlV2
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# run metadeconfoundR
features <- as.data.frame(otu_table(psControlV2))
taxtable <- as.data.frame(tax_table(psControlV2))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Class)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id")), 

raw_p_ControlV2 <- metad[1]
corr_p_ControlV2 <- metad[2]
effect_size_ControlV2 <- metad[3]
status_ControlV2 <- metad[4]
psControl_V3V5 <-
 phyloseq::subset_samples(ps_stool_relab_class , visit_cal_cor  %in% c(NA, "3", "4", "5"))

metadata_Control_V3V5 <- as(sample_data(psControl_V3V5),"data.frame")

metadata_Control_V3V5$visit_cal_cor
##   [1] 3    4    5    3    4    3    3    4    5    3    4    3    4    5    3   
##  [16] 4    3    4    5    3    3    5    4    3    4    3    4    5    3    4   
##  [31] 3    3    4    3    4    5    3    4    5    3    4    3    4    5    4   
##  [46] 5    3    4    5    3    4    5    5    3    5    4    3    5    4    5   
##  [61] 5    5    4    5    3    3    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [76] <NA> <NA> <NA> 4    3    3    4    4    3    <NA> <NA> <NA> <NA> <NA> <NA>
##  [91] <NA> <NA> <NA> <NA> <NA> <NA> <NA> 5    5    5    4    <NA> <NA> <NA> <NA>
## [106] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [121] <NA>
## Levels: 3 4 5
metadata_Control_V3V5$visit_sum
##   [1] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [10] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [19] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [28] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [37] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [46] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [55] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [64] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [73] Control Control Control Control Control Control 3-5     3-5     3-5    
##  [82] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [91] Control Control Control Control Control Control Control 3-5     3-5    
## [100] 3-5     3-5     Control Control Control Control Control Control Control
## [109] Control Control Control Control Control Control Control Control Control
## [118] Control Control Control Control
## Levels: 3-5 Control
metada <- metadata_Control_V3V5%>%
  mutate(visit_Control_V3V5 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V3V5 =metada$visit_Control_V3V5, subset(metada, select = -c(visit_Control_V3V5))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_Control_V3V5)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V3V5))
taxtable <- as.data.frame(tax_table(psControl_V3V5))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Class)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id")), 

raw_pControl_V3V5 <- metad[1]
corr_pControl_V3V5 <- metad[2]
effect_sizeControl_V3V5 <- metad[3]
statusControl_V3V5 <- metad[4]
psControl_V6V7 <-
 phyloseq::subset_samples(ps_stool_relab_class , visit_cal_cor  %in% c(NA, "6", "7"))

metadata_Control_V6V7 <- as(sample_data(psControl_V6V7),"data.frame")

metadata_Control_V6V7$visit_cal_cor
##  [1] 6    6    7    6    7    6    6    6    6    6    6    6    6    6    6   
## [16] 6    7    6    7    7    7    6    7    7    7    6    7    7    7    6   
## [31] 7    7    7    7    7    6    7    7    <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> 7    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> 6    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [76] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 6 7
metadata_Control_V6V7$visit_sum
##  [1] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [10] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [19] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [28] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [37] 6-7     6-7     Control Control Control Control Control Control Control
## [46] Control Control Control Control Control 6-7     Control Control Control
## [55] Control Control Control Control Control Control Control Control Control
## [64] Control 6-7     Control Control Control Control Control Control Control
## [73] Control Control Control Control Control Control Control Control Control
## [82] Control Control Control Control
## Levels: 6-7 Control
metada <- metadata_Control_V6V7%>%
  mutate(visit_Control_V6V7 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V6V7 =metada$visit_Control_V6V7, subset(metada, select = -c(visit_Control_V6V7))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_Control_V6V7, id)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V6V7))
taxtable <- as.data.frame(tax_table(psControl_V6V7))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Class)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,   nnodes=9)#randomVar =  c("id"),

raw_p_Control_V6V7 <- metad[1]
corr_p_Control_V6V7 <- metad[2]
effect_size_Control_V6V7 <- metad[3]
status_Control_V6V7 <- metad[4]
psControl_V8V9 <-
 phyloseq::subset_samples(ps_stool_relab_class, visit_cal_cor  %in% c(NA, "8", "9","10"))

metadata_Control_V8V9 <- as(sample_data(psControl_V8V9),"data.frame")

metadata_Control_V8V9$visit_cal_cor
##  [1] 8    8    8    8    8    8    8    8    8    8    8    <NA> <NA> <NA> <NA>
## [16] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 9    10   9    8    9    9    8   
## [31] 9    9    8    9    9    9    9    8    8    9    9    <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 9    <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 8 9 10
metadata_Control_V8V9$visit_sum
##  [1] 8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10   
## [10] 8-10    8-10    Control Control Control Control Control Control Control
## [19] Control Control Control Control Control 8-10    8-10    8-10    8-10   
## [28] 8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10   
## [37] 8-10    8-10    8-10    8-10    8-10    Control Control Control Control
## [46] Control Control Control Control Control Control Control Control Control
## [55] 8-10    Control Control Control Control Control Control Control Control
## [64] Control Control Control Control Control Control Control Control Control
## [73] Control Control Control
## Levels: 8-10 Control
metada <- metadata_Control_V8V9%>%
  mutate(visitControl_V8V9 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V8V9 =metada$visitControl_V8V9, subset(metada, select = -c(visitControl_V8V9))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V8V9)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V8V9))
taxtable <- as.data.frame(tax_table(psControl_V8V9))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Class)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id")), 

raw_p_Control_V8V9 <- metad[1]
corr_p_Control_V8V9 <- metad[2]
effect_size_Control_V8V9 <- metad[3]
status_Control_V8V9 <- metad[4]
psControl_V1 <-
 phyloseq::subset_samples(ps_stool_relab_class , visit_cal_9  %in% c("Control", "1"))

metadata_Control_Control <- as(sample_data(psControl_V1),"data.frame")

metada <- metadata_Control_Control%>%
  mutate(visitControl_V1 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V1 =metada$visitControl_V1, subset(metada, select = -c(visitControl_V1))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V1)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V1))
taxtable <- as.data.frame(tax_table(psControl_V1))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Class)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)

raw_p_Control_V1 <- metad[1]
corr_p_Control_V1 <- metad[2]
effect_size_Control_V1 <- metad[3]
status_Control_V1 <- metad[4]
raw_p <- purrr::map2(raw_p_ControlV2,raw_pControl_V3V5,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V6V7,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V8V9,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V1,cbind)

raw_p_df <- bind_rows(raw_p)
raw_p_df <- data.frame(raw_p_df$Ps)
raw_p_df  <- raw_p_df %>%
  rownames_to_column()%>%
  mutate(p_Control_V2=visit_ControlV2)%>%
  mutate(p_Control_V3V5=visit_Control_V3V5)%>%
  mutate(p_Control_V6V7=visit_Control_V6V7)%>%
  mutate(p_Control_V8V9=visitControl_V8V9)%>%
  mutate(p_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

corr_p <- purrr::map2(corr_p_ControlV2,corr_pControl_V3V5,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V6V7,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V8V9,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V1,cbind)

corr_p_df <- bind_rows(corr_p)
corr_p_df <- data.frame(corr_p_df$Qs)
corr_p_df  <- corr_p_df %>%
  rownames_to_column()%>%
  mutate(q_Control_V2=visit_ControlV2)%>%
  mutate(q_Control_V3V5=visit_Control_V3V5)%>%
  mutate(q_Control_V6V7=visit_Control_V6V7)%>%
  mutate(q_Control_V8V9=visitControl_V8V9)%>%
  mutate(q_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

effect_size <- purrr::map2(effect_size_ControlV2,effect_sizeControl_V3V5,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V6V7,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V8V9,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V1,cbind)

effect_size_df <- bind_rows(effect_size)
effect_size_df <- data.frame(effect_size_df$Ds)
effect_size_df  <- effect_size_df %>%
  rownames_to_column()%>%
  mutate(d_Control_V2=visit_ControlV2)%>%
  mutate(d_Control_V3V5=visit_Control_V3V5)%>%
  mutate(d_Control_V6V7=visit_Control_V6V7)%>%
  mutate(d_Control_V8V9=visitControl_V8V9)%>%
  mutate(d_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

status <- purrr::map2(status_ControlV2,statusControl_V3V5,cbind)
status <- purrr::map2(status,status_Control_V6V7,cbind)
status <- purrr::map2(status,status_Control_V8V9,cbind)
status <- purrr::map2(status,status_Control_V1,cbind)

status_df <- bind_rows(status)
status_df <- data.frame(status_df$status)
status_df  <- status_df %>%
  rownames_to_column()%>%
  mutate(status_Control_V2=visit_ControlV2)%>%
  mutate(status_Control_V3V5=visit_Control_V3V5)%>%
  mutate(status_Control_V6V7=visit_Control_V6V7)%>%
  mutate(status_Control_V8V9=visitControl_V8V9)%>%
  mutate(status_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

effect_table_class <- raw_p_df%>%
  full_join(corr_p_df, by="ASV")%>%
  full_join(effect_size_df, by="ASV")%>%
  full_join(status_df, by="ASV")%>%
  full_join(taxtable, by="ASV")

# dplyr::select the entries which have OK_nc in status
effect_table_sig_class <- effect_table_class%>%
  filter(status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc")

#pivot long format

effect_table_sig_long_class <- effect_table_sig_class%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa= paste(Class, "(Class)"))%>%
  dplyr::select(-Class)
effect_table_sig_long_class$comparison_p <- factor(effect_table_sig_long_class$comparison_p, levels = c("Control_V1","Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long_class%>%
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = as.factor (sign (effectSize)), size = abs (effectSize), color=(fdr))) +
  scale_shape_manual (values = c (25,24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("1", "3", "6-12", "15-18", "21-24"))+
  labs(x= "months from treatment start")

# dplyr::select the entries which have OK_nc in status with all differences to Control

effect_table_sig_control <- effect_table_class%>%
  filter(status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc"| status_Control_V1=="OK_nc")

#pivot long format

effect_table_sig_long_control_class<- effect_table_sig_control%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa= paste(Class, "(Class)"))%>%
  dplyr::select(-Class)

effect_table_sig_long_control_class$comparison_p <- factor(effect_table_sig_long_control_class$comparison_p, levels = c("Control_V1", "Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long_control_class%>%
  mutate(shape_sign= as.factor(sign(effectSize))) %>% 
  filter(shape_sign!=0) %>% #fiter out effect sizes that have no direction
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = shape_sign, size = abs (effectSize), color=(fdr)))+
  scale_shape_manual (values = c (25,24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("1", "3", "6-12", "15-18", "21-24"))+
  labs(x= "months from treatment start")

phylum level

psControlV2 <-
 phyloseq::subset_samples(ps_stool_relab_phylum , visit_cal_cor  %in% c(NA, "2"))

metadata_ControlV2 <- as(sample_data(psControlV2),"data.frame")

metada <- metadata_ControlV2%>%
  mutate(visit_ControlV2 = (as.numeric(visit.x))-1) %>% 
  mutate(visit_ControlV2 = case_when(is.na(visit_ControlV2)~1, T~0))

metada<- cbind(visit_ControlV2 =metada$visit_ControlV2, subset(metada, select = -c(visit_ControlV2))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_ControlV2)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControlV2))
taxtable <- as.data.frame(tax_table(psControlV2))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Phylum)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id")), 

raw_p_ControlV2 <- metad[1]
corr_p_ControlV2 <- metad[2]
effect_size_ControlV2 <- metad[3]
status_ControlV2 <- metad[4]
psControl_V3V5 <-
 phyloseq::subset_samples(ps_stool_relab_phylum , visit_cal_cor  %in% c(NA, "3", "4", "5"))

metadata_Control_V3V5 <- as(sample_data(psControl_V3V5),"data.frame")

metadata_Control_V3V5$visit_cal_cor
##   [1] 3    4    5    3    4    3    3    4    5    3    4    3    4    5    3   
##  [16] 4    3    4    5    3    3    5    4    3    4    3    4    5    3    4   
##  [31] 3    3    4    3    4    5    3    4    5    3    4    3    4    5    4   
##  [46] 5    3    4    5    3    4    5    5    3    5    4    3    5    4    5   
##  [61] 5    5    4    5    3    3    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
##  [76] <NA> <NA> <NA> 4    3    3    4    4    3    <NA> <NA> <NA> <NA> <NA> <NA>
##  [91] <NA> <NA> <NA> <NA> <NA> <NA> <NA> 5    5    5    4    <NA> <NA> <NA> <NA>
## [106] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [121] <NA>
## Levels: 3 4 5
metadata_Control_V3V5$visit_sum
##   [1] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [10] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [19] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [28] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [37] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [46] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [55] 3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5     3-5    
##  [64] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [73] Control Control Control Control Control Control 3-5     3-5     3-5    
##  [82] 3-5     3-5     3-5     Control Control Control Control Control Control
##  [91] Control Control Control Control Control Control Control 3-5     3-5    
## [100] 3-5     3-5     Control Control Control Control Control Control Control
## [109] Control Control Control Control Control Control Control Control Control
## [118] Control Control Control Control
## Levels: 3-5 Control
metada <- metadata_Control_V3V5%>%
  mutate(visit_Control_V3V5 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V3V5 =metada$visit_Control_V3V5, subset(metada, select = -c(visit_Control_V3V5))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_Control_V3V5, id)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V3V5))
taxtable <- as.data.frame(tax_table(psControl_V3V5))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Phylum)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id")), 

raw_pControl_V3V5 <- metad[1]
corr_pControl_V3V5 <- metad[2]
effect_sizeControl_V3V5 <- metad[3]
statusControl_V3V5 <- metad[4]
psControl_V6V7 <-
 phyloseq::subset_samples(ps_stool_relab_phylum , visit_cal_cor  %in% c(NA, "6", "7"))

metadata_Control_V6V7 <- as(sample_data(psControl_V6V7),"data.frame")

metadata_Control_V6V7$visit_cal_cor
##  [1] 6    6    7    6    7    6    6    6    6    6    6    6    6    6    6   
## [16] 6    7    6    7    7    7    6    7    7    7    6    7    7    7    6   
## [31] 7    7    7    7    7    6    7    7    <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> 7    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> 6    <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## [76] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 6 7
metadata_Control_V6V7$visit_sum
##  [1] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [10] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [19] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [28] 6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7     6-7    
## [37] 6-7     6-7     Control Control Control Control Control Control Control
## [46] Control Control Control Control Control 6-7     Control Control Control
## [55] Control Control Control Control Control Control Control Control Control
## [64] Control 6-7     Control Control Control Control Control Control Control
## [73] Control Control Control Control Control Control Control Control Control
## [82] Control Control Control Control
## Levels: 6-7 Control
metada <- metadata_Control_V6V7%>%
  mutate(visit_Control_V6V7 = (as.numeric(visit_sum))-1)

metada<- cbind(visit_Control_V6V7 =metada$visit_Control_V6V7, subset(metada,select = -c(visit_Control_V6V7))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visit_Control_V6V7, id)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V6V7))
taxtable <- as.data.frame(tax_table(psControl_V6V7))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Phylum)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable,select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id")), 

raw_p_Control_V6V7 <- metad[1]
corr_p_Control_V6V7 <- metad[2]
effect_size_Control_V6V7 <- metad[3]
status_Control_V6V7 <- metad[4]
psControl_V8V9 <-
 phyloseq::subset_samples(ps_stool_relab_phylum, visit_cal_cor  %in% c(NA, "8", "9"))

metadata_Control_V8V9 <- as(sample_data(psControl_V8V9),"data.frame")

metadata_Control_V8V9$visit_cal_cor
##  [1] 8    8    8    8    8    8    8    8    8    8    8    <NA> <NA> <NA> <NA>
## [16] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 9    9    8    9    9    8    9   
## [31] 9    8    9    9    9    9    8    8    9    9    <NA> <NA> <NA> <NA> <NA>
## [46] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 9    <NA> <NA> <NA> <NA> <NA> <NA>
## [61] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Levels: 8 9
metadata_Control_V8V9$visit_sum
##  [1] 8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10   
## [10] 8-10    8-10    Control Control Control Control Control Control Control
## [19] Control Control Control Control Control 8-10    8-10    8-10    8-10   
## [28] 8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10    8-10   
## [37] 8-10    8-10    8-10    8-10    Control Control Control Control Control
## [46] Control Control Control Control Control Control Control Control 8-10   
## [55] Control Control Control Control Control Control Control Control Control
## [64] Control Control Control Control Control Control Control Control Control
## [73] Control Control
## Levels: 8-10 Control
metada <- metadata_Control_V8V9%>%
  mutate(visitControl_V8V9 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V8V9 =metada$visitControl_V8V9, subset(metada,select = -c(visitControl_V8V9))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V8V9, id)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V8V9))
taxtable <- as.data.frame(tax_table(psControl_V8V9))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Phylum)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable,select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)#randomVar = list("+ (1|id)", c("id")), 

raw_p_Control_V8V9 <- metad[1]
corr_p_Control_V8V9 <- metad[2]
effect_size_Control_V8V9 <- metad[3]
status_Control_V8V9 <- metad[4]
psControl_V1 <-
 phyloseq::subset_samples(ps_stool_relab_phylum , visit_cal_cor  %in% c(NA, "1"))

metadata_Control_Control <- as(sample_data(psControl_V1),"data.frame")

metada <- metadata_Control_Control%>%
  mutate(visitControl_V1 = (as.numeric(visit_sum))-1)

metada<- cbind(visitControl_V1 =metada$visitControl_V1, subset(metada, select = -c(visitControl_V1))) # brings visit into the first column

metada <- metada%>%
  dplyr::select(visitControl_V1, id)

# run metadeconfoundR
features <- as.data.frame(otu_table(psControl_V1))
taxtable <- as.data.frame(tax_table(psControl_V1))

#create two-column-dataframe containing corresponding "human-readable" names to the "machine-readable" feature names used as row.names in metaDeconfOutput.  
taxtable <- taxtable%>%
  dplyr::select(Phylum)
taxtable$ASV <- row.names(taxtable)
taxtable<- cbind(ASV=taxtable$ASV,subset(taxtable, select = -c(ASV)))

### run it
metad <- MetaDeconfound(featureMat = features, metaMat = metada,  nnodes=9)

raw_p_Control_V1 <- metad[1]
corr_p_Control_V1 <- metad[2]
effect_size_Control_V1 <- metad[3]
status_Control_V1 <- metad[4]
raw_p <- purrr::map2(raw_p_ControlV2,raw_pControl_V3V5,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V6V7,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V8V9,cbind)
raw_p <- purrr::map2(raw_p,raw_p_Control_V1,cbind)

raw_p_df <- bind_rows(raw_p)
raw_p_df <- data.frame(raw_p_df$Ps)
raw_p_df  <- raw_p_df %>%
  rownames_to_column()%>%
  mutate(p_Control_V2=visit_ControlV2)%>%
  mutate(p_Control_V3V5=visit_Control_V3V5)%>%
  mutate(p_Control_V6V7=visit_Control_V6V7)%>%
  mutate(p_Control_V8V9=visitControl_V8V9)%>%
  mutate(p_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

corr_p <- purrr::map2(corr_p_ControlV2,corr_pControl_V3V5,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V6V7,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V8V9,cbind)
corr_p <- purrr::map2(corr_p,corr_p_Control_V1,cbind)

corr_p_df <- bind_rows(corr_p)
corr_p_df <- data.frame(corr_p_df$Qs)
corr_p_df  <- corr_p_df %>%
  rownames_to_column()%>%
  mutate(q_Control_V2=visit_ControlV2)%>%
  mutate(q_Control_V3V5=visit_Control_V3V5)%>%
  mutate(q_Control_V6V7=visit_Control_V6V7)%>%
  mutate(q_Control_V8V9=visitControl_V8V9)%>%
  mutate(q_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

effect_size <- purrr::map2(effect_size_ControlV2,effect_sizeControl_V3V5,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V6V7,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V8V9,cbind)
effect_size <- purrr::map2(effect_size,effect_size_Control_V1,cbind)

effect_size_df <- bind_rows(effect_size)
effect_size_df <- data.frame(effect_size_df$Ds)
effect_size_df  <- effect_size_df %>%
  rownames_to_column()%>%
  mutate(d_Control_V2=visit_ControlV2)%>%
  mutate(d_Control_V3V5=visit_Control_V3V5)%>%
  mutate(d_Control_V6V7=visit_Control_V6V7)%>%
  mutate(d_Control_V8V9=visitControl_V8V9)%>%
  mutate(d_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

status <- purrr::map2(status_ControlV2,statusControl_V3V5,cbind)
status <- purrr::map2(status,status_Control_V6V7,cbind)
status <- purrr::map2(status,status_Control_V8V9,cbind)
status <- purrr::map2(status,status_Control_V1,cbind)

status_df <- bind_rows(status)
status_df <- data.frame(status_df$status)
status_df  <- status_df %>%
  rownames_to_column()%>%
  mutate(status_Control_V2=visit_ControlV2)%>%
  mutate(status_Control_V3V5=visit_Control_V3V5)%>%
  mutate(status_Control_V6V7=visit_Control_V6V7)%>%
  mutate(status_Control_V8V9=visitControl_V8V9)%>%
  mutate(status_Control_V1=visitControl_V1)%>%
  mutate(ASV=rowname)#%>%
  #dplyr::select(-c(1:10))

effect_table_phylum <- raw_p_df%>%
  full_join(corr_p_df, by="ASV")%>%
  full_join(effect_size_df, by="ASV")%>%
  full_join(status_df, by="ASV")%>%
  full_join(taxtable, by="ASV")

# dplyr::select the entries which have OK_nc in status
effect_table_sig_phylum <- effect_table_phylum%>%
  filter(status_Control_V1=="OK_nc"|status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc")

#pivot long format

effect_table_sig_long_phylum <- effect_table_sig_phylum%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p_"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa= paste(Phylum, "(Phylum)"))%>%
  dplyr::select(-Phylum)
effect_table_sig_long_phylum$comparison_p <- factor(effect_table_sig_long_phylum$comparison_p, levels = c("Control_V1","Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long_phylum%>%
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = as.factor (sign (effectSize)), size = abs (effectSize), color=(fdr))) +
  scale_shape_manual (values = c (25, 24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("1", "3", "6-12", "15-18", "21-24"))+
  labs(x= "months from treatment start")
# dplyr::select the entries which have OK_nc in status with all differences to Control

effect_table_sig_control <- effect_table_phylum%>%
  filter(status_Control_V2=="OK_nc"|status_Control_V3V5=="OK_nc"|status_Control_V6V7 =="OK_nc" | status_Control_V8V9=="OK_nc"| status_Control_V1=="OK_nc")

#pivot long format

effect_table_sig_long_control_phylum<- effect_table_sig_control%>%
  pivot_longer(cols = starts_with("status"), names_to = "comparison_status", values_to = "status")%>%
  separate(comparison_status, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_status=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  pivot_longer(cols = starts_with("p_"), names_to = "comparison_p", values_to = "raw_p")%>%
  separate(comparison_p, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_p=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_status)%>%
  pivot_longer(cols = starts_with("d"), names_to = "comparison_effectSize", values_to = "effectSize")%>%
  separate(comparison_effectSize, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_effectSize=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_effectSize)%>%
  pivot_longer(cols = starts_with("q"), names_to = "comparison_q", values_to = "corr_p")%>%
  separate(comparison_q, c("variable","Visit1", "Visit_Sum"), "_")%>%
  mutate(comparison_q=paste(Visit1,Visit_Sum, sep="_"))%>%
  dplyr::select(-c(variable, Visit1, Visit_Sum))%>%
  filter(comparison_p==comparison_q)%>%
  dplyr::select(-c(comparison_q, comparison_effectSize,comparison_status))%>%
  mutate(fdr= as_factor(case_when(corr_p <= 0.05 ~ "*", corr_p <= 0.01 ~ "**", corr_p <= 0.001 ~ "***", corr_p <= 0.1 ~ ".")))%>%
  mutate(taxa= paste(Phylum, "(Phylum)"))%>%
  dplyr::select(-Phylum)

effect_table_sig_long_control_phylum$comparison_p <- factor(effect_table_sig_long_control_phylum$comparison_p, levels = c("Control_V1", "Control_V2", "Control_V3V5","Control_V6V7","Control_V8V9"))

effect_table_sig_long_control_phylum%>%
  mutate(shape_sign= as.factor(sign(effectSize))) %>% 
  filter(shape_sign!=0) %>% #fiter out effect sizes that have no direction
  ggplot(aes (x = comparison_p, y = taxa))+ 
  geom_point (aes (fill = effectSize, shape = shape_sign, size = abs (effectSize), color=(fdr)))+
  scale_shape_manual (values = c (25,24)) + 
  scale_fill_gradient2 (low = "blue", high = "red", mid = "white", midpoint = 0) +
  scale_color_manual(values=c("gray22","gray1","gray85"))+
  geom_text (aes (label = stars.pval (corr_p)))+
  theme_grey() +
  theme(axis.text.x = element_text(), #angle = 0, hjust = 0, vjust = 1
       legend.position = "right",axis.text.y = element_text(face = "italic"))+
  scale_x_discrete(labels=c("1", "3", "6-12", "15-18", "21-24"))+
  labs(x= "months from treatment start")

Combine different tax levels into one plot

# combine output tabels
# Select columns from each dataframe
effect_table_sig_long_control <- effect_table_sig_long_control %>%
  dplyr::select(ASV, status, raw_p, comparison_p, effectSize, corr_p, fdr, taxa)

effect_table_sig_long_control_family <- effect_table_sig_long_control_family %>%
  dplyr::select(ASV, status, raw_p, comparison_p, effectSize, corr_p, fdr, taxa)

effect_table_sig_long_control_class <- effect_table_sig_long_control_class %>%
  dplyr::select(ASV, status, raw_p, comparison_p, effectSize, corr_p, fdr, taxa)

effect_table_sig_long_control_phylum <- effect_table_sig_long_control_phylum %>%
 dplyr::select(ASV, status, raw_p, comparison_p, effectSize, corr_p, fdr, taxa)

# bind them
effect_table_sig_all_control <- rbind(effect_table_sig_long_control, effect_table_sig_long_control_family, effect_table_sig_long_control_class, effect_table_sig_long_control_phylum)

# no significant changes on order level observed;  effect_table_sig_long_control_order gets not included here

# retrieve table for supplement
effect_table_sig_all_control_write <- effect_table_sig_all_control %>% 
  mutate(taxa=gsub("ASV\\d{3}_", "", taxa))

write_csv(effect_table_sig_all_control_write, "/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/data/effect_size_table_stool_ControlTimepoints.csv")

#writexl::write_xlsx(effect_table_sig_all_control_write, "/Users/rebecca/Documents/Forschung/IMMProveCF/Manuscript_16S/Submission_CHM/SuppMaterial/SuppTable_18.xlsx")

effect_table_sig_all_control %>%
  mutate(taxa=gsub("ASV\\d{3}_", "", taxa)) %>% 
  arrange(ASV) %>%
  ggplot(aes(x = comparison_p, y = taxa)) + 
  geom_point(
    aes(fill = effectSize, shape = as.factor(sign(effectSize)), size = abs(effectSize), color = fdr)
  ) +
  scale_size_continuous(name = "Absolute effect size (Cliff's delta)", guide = guide_legend(ncol = 5)) + 
  scale_shape_manual(name = "", labels = c("decreased","increased"), values = c(25,24),guide = guide_legend(ncol = 2)) + 
  scale_fill_gradient2(
    name = "Effect size (Cliff's delta)",
    low = "#746999", high = "#69b3a2", mid = "white", midpoint = 0) +
  scale_color_manual(
    name = "", labels = c(". fdr < 0.1", "* fdr < 0.05", "non-significant"),
    values = c("gray22", "gray1", "gray85"), guide = guide_legend(ncol = 3)
  ) +
  geom_text(aes(label = fdr)) +
  theme_grey() +
  theme(
    axis.text.x = element_text(size = 17),
    axis.title.x = element_text(size = 17),
    legend.position = "right",
    axis.text.y = element_text(face = "italic", size = 16),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 17),
    legend.key = element_rect(size = 8),
    legend.key.size = unit(1.5, 'lines')
  ) +
  scale_x_discrete(labels = c("1", "3", "6-12", "15-18", "21-24")) +
  labs(x = "months from treatment start", y = "")
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 110 rows containing missing values or values outside the scale range
## (`geom_text()`).

#ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/relAb_cuneiform_plot_stool_Controls.png", width = 11, height = 5)